/***********************************************************************************
File        : compile_dhs_data.do 
Authors     : Ariel Dora Stern & Nick Menzies
Created     : 21 Mar 2010
Modified    : 9 Apr 2010
Description : Compile DHS Data for Gov 2001 Paper
***********************************************************************************/

* Countries + country numbers
	* Burkina Faso (1)
	* Cameroon 	   (2)
	* Ghana	   (3)
	* Kenya	   (4)
	* Tanzania	   (5)

* Survey Datasets
	* Individual Recode
		* BFIR43DFL.dta 
		* CMIR44 FL.dta, etc

	* Household Member Recode (*NOT* Household Recode!)
		* BFPR44FL.dta
		* CMPR45FL.dta, etc

	* HIV Datasets
		* BFAR41FL.dta
		* CMAR42FL.dta, etc

*-------------------------------------------*
* We want to recreate Table 2
* To do so, we'll need all of the variables:
*-------------------------------------------*

clear
set mem 500m
cd "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files"

*-------------------------------*
*Generate Burkina Faso Data Set:
*-------------------------------*

********** Merge datasets

local mergers `"country hv001 hv002 hv003"'  
use BF_HIV.dta
sort `mergers'
merge `mergers' using BF_HH.dta
compress
drop _m 
sort `mergers'
merge `mergers' using BF_IND.dta
drop _m
sort `mergers'

********** Generate variables
gen cluster = hivclust
gen cluster_new = cluster + 1000*country
gen sweight = hiv05/1000000 
gen tot_pop = 13081911 
count if hiv03!=.
gen sweight_new = sweight* r(N)/tot_pop
* sweight_new is for use in pooled regressions, calculated as sweight * sample size for HIV dataset / country population at survey date
gen catholic = 0 if v130!=.
	replace catholic = 1 if v130 ==1
gen protestant = 0 if v130!=.
	replace protestant = 1 if v130 == 2
gen muslim = 0 if v130 !=.
	replace muslim = 1 if v130 == 3
gen oth_relig = 0 if v130!=.
	replace oth_relig = 1 if v130==4 | v130==5
gen efloor = v127 == 11
	replace efloor = . if v127 ==. 
gen toilet = v116 == 0 if v116!=.
	replace toilet = 1 if v116 != 97 & v116!=31
gen electr = v119 == 1
	replace electr =. if v119 ==.
gen radio  = v120 == 1
	replace radio = . if v120 ==.
gen tv     = v121 == 1
	replace tv = . if v121 ==. 
gen refrig = v122 == 1
	replace refrig =. if v122 ==.
gen bike   = v123 == 1
	replace bike = . if v123 ==.
gen motorc = v124 == 1
	replace motorc =. if v124==.
gen car    = v125 == 1
	replace car =. if v125 ==.
gen urban  = hv025 == 1
	replace urban = . if hv025 ==.
gen educ    = v133
	label var educ "years of education"
gen female = 0 if hv104==1
	replace female = 1 if hv104 == 2
gen married = 0 if v502 !=.
	replace married = 1 if v502 == 1
gen widow = 0 if v501 !=.
	replace widow = 0 if v502 ==0 |v502 ==1
	replace widow = 1 if v501 == 3
gen mult_mar = 0 if v503!=.
	replace mult_mar = 0 if v502==0
      replace mult_mar = 1 if v503==2
gen was_married = 0 if v502!=.
	replace was_married = 1 if v502 == 2
gen hiv = 0 if hiv03 !=.
	replace hiv = 1 if hiv03 ==1
gen polyg = v505 != 0
	replace polyg = . if v505 ==.
	replace polyg = 0 if v502==0 | v502==2
gen agegrp = v013
quietly tabulate v131, gen(eth1)
	*generates a set of ethnicity dummies
quietly tabulate hv024, gen(reg1)
	*generates a set of region dummies
quietly tabulate v013, gen(age1)
	*generates a set of age dummies


********** Save...

save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\BF_ALL", replace
	*the big data set

keep cluster cluster_new sweight sweight_new hv001 hv002 hv003 hiv03 country catholic muslim protestant catholic ///
	oth_relig efloor toilet electr refrig ///
	radio tv bike motorc car urban educ female /// 
	married widow mult_mar was_married hiv polyg agegrp ///
	eth11 eth12 eth13 eth14 eth15 eth16 eth17 eth18 eth19 eth110 eth111 /// 
	reg11 reg12 reg13 reg14 reg15 reg16 reg17 reg18 reg19 reg110 reg111 reg112 reg113 reg114 ///
 	age11 age12 age13 age14 age15 age16 age17	
sort `mergers'
save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\BF_SMALL", replace
	*makes the small data set
clear

*-------------------------------*
*Generate Cameroon Data Set:
*-------------------------------*

********** Merge datasets

local mergers `"country hv001 hv002 hv003"' 
use CM_HIV.dta
sort `mergers'
merge `mergers' using CM_HH.dta
compress
drop _m 
sort `mergers'
merge `mergers' using CM_IND.dta
drop _m
sort `mergers'

********** Generate variables

gen cluster = hivclust
gen cluster_new = cluster + 1000*country
gen sweight = hiv05/1000000 
gen tot_pop = 17409433 
count if hiv03!=.
gen sweight_new = sweight* r(N)/tot_pop
* sweight_new is for use in pooled regressions, calculated as sweight * sample size for HIV dataset / country population at survey date
gen catholic = 0 if v130 !=.
	replace catholic = 1 if v130==1
gen protestant = 0 if v130!=.
	replace protestant = 1 if v130 == 2
gen muslim = 0 if v130!=.
	replace muslim = 1 if v130 == 3
gen oth_relig = 0 if v130!=.
	replace oth_relig = 1 if v130==4 | v130==6 | v130==7
gen efloor = v127 == 11
	replace efloor = . if v127 ==. 
gen toilet = v116 == 0 if v116!=.
	replace toilet = 1 if v116 != 97 & v116!=31
gen electr = v119 == 1
	replace electr =. if v119 ==.
gen radio  = v120 == 1
	replace radio = . if v120 ==.
gen tv     = v121 == 1
	replace tv = . if v121 ==. 
gen refrig = v122 == 1
	replace refrig =. if v122 ==.
gen bike   = v123 == 1
	replace bike = . if v123 ==.
gen motorc = v124 == 1
	replace motorc =. if v124==.
gen car    = v125 == 1
	replace car =. if v125 ==.
gen urban  = hv025 == 1
	replace urban = . if hv025 ==.
gen educ    = v133
	label var educ "years of education"
gen female = 0 if hv104==1
	replace female = 1 if hv104 == 2
gen married = 0 if v502 !=.
	replace married = 1 if v502 == 1
gen widow = 0 if v501 !=.
	replace widow = 0 if v502 ==0 |v502 ==1
	replace widow = 1 if v501 == 3
gen mult_mar = 0 if v503!=.
	replace mult_mar = 0 if v502==0
      replace mult_mar = 1 if v503==2
gen was_married = 0 if v502!=.
	replace was_married = 1 if v502 == 2
gen hiv = 0 if hiv03 !=.
	replace hiv = 1 if hiv03 ==1
gen polyg = v505 != 0
	replace polyg = . if v505 ==.
	replace polyg = 0 if v502==0 | v502==2
gen agegrp = v013
quietly tabulate v131, gen(eth2)
	*generates a set of ethnicity dummies
quietly tabulate hv024, gen(reg2)
	*generates a set of region dummies
quietly tabulate v013, gen(age2)
	*generates a set of age dummies

********** Save...

save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\CM_ALL", replace
	*the big data set

keep cluster cluster_new sweight sweight_new hv001 hv002 hv003 hiv03 country muslim protestant catholic ///
	oth_relig efloor toilet electr refrig ///
	radio tv bike motorc car urban educ female /// 
	married widow mult_mar was_married hiv polyg agegrp /// 
	eth21 eth22 eth23 eth24 eth25 eth26 eth27 eth28 eth29 eth210  /// 
	eth211 eth212 eth213 eth214 eth215 eth216 eth217 eth218 eth219 eth220 ///  
	eth221 eth222 eth223 eth224 eth225 eth226 eth227 eth228 eth229 eth230  /// 
	eth231 eth232 eth233 eth234 eth235 eth236 eth237 eth238 eth239 eth240  /// 
	eth241 eth242 eth243 eth244 eth245 eth246 eth247 eth248 eth249 eth250 ///
	reg21 reg22 reg23 reg24 reg25 reg26 reg27 reg28 reg29 reg210 reg211 reg212 ///
	age21 age22 age23 age24 age25 age26 age27
sort `mergers'
save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\CM_SMALL", replace
	*makes the small data set
clear


*-------------------------------*
*Generate Ghana Data Set:
*-------------------------------*

********** Merge datasets

local mergers `"country hv001 hv002 hv003"' 
use GH_HIV.dta
sort `mergers'
merge `mergers' using GH_HH.dta
compress
drop _m 
sort `mergers'
merge `mergers' using GH_IND.dta
drop _m
sort `mergers'

********** Generate variables

gen cluster = hivclust
gen cluster_new = cluster + 1000*country
gen sweight = hiv05/1000000 
gen tot_pop = 20954557 
count if hiv03!=.
gen sweight_new = sweight* r(N)/tot_pop
* sweight_new is for use in pooled regressions, calculated as sweight * sample size for HIV dataset / country population at survey date
gen catholic = 0 if v130!=.
	replace catholic = 1 if v130 ==1
gen protestant = 0 if v130!=.
	replace protestant = 1 if v130==2 | v130==3 | v130==4 | v130==5
gen muslim = 0 if v130 !=.
	replace muslim = 1 if v130==6
gen oth_relig = 0 if v130!=.
	replace oth_relig = 1 if v130==0 | v130==7 | v130==8
gen efloor = v127 == 11
	replace efloor = . if v127 ==. 
gen toilet = v116 == 0 if v116!=.
	replace toilet = 1 if v116 != 97 & v116!=31
gen electr = v119 == 1
	replace electr =. if v119 ==.
gen radio  = v120 == 1
	replace radio = . if v120 ==.
gen tv     = v121 == 1
	replace tv = . if v121 ==. 
gen refrig = v122 == 1
	replace refrig =. if v122 ==.
gen bike   = v123 == 1
	replace bike = . if v123 ==.
gen motorc = v124 == 1
	replace motorc =. if v124==.
gen car    = v125 == 1
	replace car =. if v125 ==.
gen urban  = hv025 == 1
	replace urban = . if hv025 ==.
gen educ    = v133
	label var educ "years of education"
gen female = 0 if hv104==1
	replace female = 1 if hv104 == 2
gen married = 0 if v502 !=.
	replace married = 1 if v502 == 1
gen widow = 0 if v501 !=.
	replace widow = 0 if v502 ==0 |v502 ==1
	replace widow = 1 if v501 == 3
gen mult_mar = 0 if v503!=.	
	replace mult_mar = 0 if v502==0
      replace mult_mar = 1 if v503==2
gen was_married = 0 if v502!=.
	replace was_married = 1 if v502 == 2
gen hiv = 0 if hiv03 !=.
	replace hiv = 1 if hiv03 ==1
gen polyg = v505 != 0
	replace polyg = . if v505 ==.
	replace polyg = 0 if v502==0 | v502==2
gen agegrp = v013
quietly tabulate v131, gen(eth3)
	*generates a set of ethnicity dummies
quietly tabulate hv024, gen(reg3)
	*generates a set of region dummies
quietly tabulate v013, gen(age3)
	*generates a set of age dummies

********** Save...

save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\GH_ALL", replace
	*the big data set

keep cluster cluster_new sweight sweight_new hv001 hv002 hv003 hiv03 country muslim protestant catholic ///
	oth_relig efloor toilet electr refrig ///
	radio tv bike motorc car urban educ female /// 
	married widow mult_mar was_married hiv polyg agegrp /// 
	eth31 eth32 eth33 eth34 eth35 eth36 eth37 eth38 eth39 ///
 	reg31 reg32 reg33 reg34 reg35 reg36 reg37 reg38 reg39 reg310 ///
	age31 age32 age33 age34 age35 age36 age37
	
sort `mergers'
save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\GH_SMALL", replace
	*makes the small data set
clear


*-------------------------------*
*Generate Kenya Data Set:
*-------------------------------*

********** Merge datasets

local mergers `"country hv001 hv002 hv003"' 
use KE_HIV.dta
sort `mergers'
merge `mergers' using KE_HH.dta
compress
drop _m 
sort `mergers'
merge `mergers' using KE_IND.dta
drop _m
sort `mergers'

********** Generate variables

gen cluster = hivclust
gen cluster_new = cluster + 1000*country
gen sweight = hiv05/1000000 
gen tot_pop = 33779932 
count if hiv03!=.
gen sweight_new = sweight* r(N)/tot_pop
* sweight_new is for use in pooled regressions, calculated as sweight * sample size for HIV dataset / country population at survey date
gen catholic = 0 if v130!=.
	replace catholic = 1 if v130 ==1
gen protestant = 0 if v130!=.
	replace protestant = 1 if v130 == 2
gen muslim = 0 if v130 !=.
	replace muslim = 1 if v130 == 3
gen oth_relig = 0 if v130!=.
	replace oth_relig = 1 if v130==4| v130==6
gen efloor = v127 == 11
	replace efloor = . if v127 ==. 
gen toilet = v116 == 0 if v116!=.
	replace toilet = 1 if v116 != 97 & v116!=31
gen electr = v119 == 1
	replace electr =. if v119 ==.
gen radio  = v120 == 1
	replace radio = . if v120 ==.
gen tv     = v121 == 1
	replace tv = . if v121 ==. 
gen refrig = v122 == 1
	replace refrig =. if v122 ==.
gen bike   = v123 == 1
	replace bike = . if v123 ==.
gen motorc = v124 == 1
	replace motorc =. if v124==.
gen car    = v125 == 1
	replace car =. if v125 ==.
gen urban  = hv025 == 1
	replace urban = . if hv025 ==.
gen educ    = v133
	label var educ "years of education"
gen female = 0 if hv104==1
	replace female = 1 if hv104 == 2
gen married = 0 if v502 !=.
	replace married = 1 if v502 == 1
gen widow = 0 if v501 !=.
	replace widow = 0 if v502 ==0 |v502 ==1
	replace widow = 1 if v501 == 3
gen mult_mar = 0 if v503!=.
 	replace mult_mar = 0 if v502==0
      replace mult_mar = 1 if v503==2
gen was_married = 0 if v502!=.
	replace was_married = 1 if v502 == 2
gen hiv = 0 if hiv03 !=.
	replace hiv = 1 if hiv03 ==1
gen polyg = v505 != 0
	replace polyg = . if v505 ==.
	replace polyg = 0 if v502==0 | v502==2
gen agegrp = v013
quietly tabulate v131, gen(eth4)
	*generates a set of ethnicity dummies
quietly tabulate hv024, gen(reg4)
	*generates a set of region dummies
quietly tabulate v013, gen(age4)
	*generates a set of age dummies

********** Save...

save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\KE_ALL", replace
	*the big data set

keep cluster cluster_new sweight sweight_new hv001 hv002 hv003 hiv03 country muslim protestant catholic ///
	oth_relig efloor toilet electr refrig ///
	radio tv bike motorc car urban educ female /// 
	married widow mult_mar was_married hiv polyg agegrp /// 
	eth41 eth42 eth43 eth44 eth45 eth46 eth47 eth48 eth49 /// 
	eth410 eth411 eth412 eth413 eth414 eth415 ///
	reg41 reg42 reg43 reg44 reg45 reg46 reg47 reg48 ///
	age41 age42 age43 age44 age45 age46 age47

sort `mergers'
save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\KE_SMALL", replace
	*makes the small data set
clear


*-------------------------------*
*Generate Tanzania Data Set:
*-------------------------------*

********** Merge datasets

local mergers `"country hv001 hv002 hv003"' 
use TZ_HIV.dta
sort `mergers'
merge `mergers' using TZ_HH.dta
compress
drop _m 
sort `mergers'
merge `mergers' using TZ_IND.dta
drop _m
sort `mergers'

********** Generate variables

gen cluster = hivclust
gen cluster_new = cluster + 1000*country
gen sweight = hiv05/1000000 
gen tot_pop = 37945476 
count if hiv03!=.
gen sweight_new = sweight* r(N)/tot_pop
* sweight_new is for use in pooled regressions, calculated as sweight * sample size for HIV dataset / country population at survey date
gen catholic = 0 if v130!=.
	replace catholic = 1 if v130==2
gen protestant = 0 if v130!=.
	replace protestant = 1 if v130==3
gen muslim = 0 if v130 !=.
	replace muslim = 1 if v130==1
gen oth_relig = 0 if v130!=.
	replace oth_relig = 1 if v130==4 | v130==6
gen efloor = v127 == 11
	replace efloor = . if v127 ==. 
gen toilet = v116 == 0 if v116!=.
	replace toilet = 1 if v116 != 97 & v116!=31
gen electr = v119 == 1
	replace electr =. if v119 ==.
gen radio  = v120 == 1
	replace radio = . if v120 ==.
gen tv     = v121 == 1
	replace tv = . if v121 ==. 
gen refrig = v122 == 1
	replace refrig =. if v122 ==.
gen bike   = v123 == 1
	replace bike = . if v123 ==.
gen motorc = v124 == 1
	replace motorc =. if v124==.
gen car    = v125 == 1
	replace car =. if v125 ==.
gen urban  = hv025 == 1
	replace urban = . if hv025 ==.
gen educ    = v133
	label var educ "years of education"
gen female = 0 if hv104==1
	replace female = 1 if hv104 == 2
gen married = 0 if v502 !=.
	replace married = 1 if v502 == 1
*** Widowhood not recorded in Tanzania survey.
gen mult_mar = 0 if v503!=.
	replace mult_mar = 0 if v502==0
      replace mult_mar = 1 if v503==2
gen was_married = 0 if v502!=.
	replace was_married = 1 if v502 == 2
gen hiv = 0 if hiv03 !=.
	replace hiv = 1 if hiv03 ==1
gen polyg = v505 != 0
	replace polyg = . if v505 ==.
	replace polyg = 0 if v502==0 | v502==2
gen agegrp = v013
*** Ethnicity not recorded in Tanzania survey.
quietly tabulate hv024, gen(reg5)
	*generates a set of region dummies
quietly tabulate v013, gen(age5)
	*generates a set of age dummies

********** Save...

save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\TZ_ALL", replace
	*the big data set

keep cluster cluster_new sweight sweight_new hv001 hv002 hv003 hiv03 country muslim protestant catholic ///
	oth_relig efloor toilet electr refrig ///
	radio tv bike motorc car urban educ female /// 
	married mult_mar was_married hiv polyg agegrp /// 
	reg51 reg52 reg53 reg54 reg55 reg56 reg57 reg58 reg59 reg510 reg511 ///
	reg512 reg513 reg514 reg515 reg516 reg517 reg518 reg519 reg520 reg521 ///
	age51 age52 age53 age54 age55 age56 age57
sort `mergers'
save "C:\Users\Nick\Documents\Harvard\Courses\Gov 2001\Replication Paper\Data files\Country Files\TZ_SMALL", replace
	*makes the small data set
clear
